1 Introduction

The aims of this notebook are to (1) download and read the 10X Multiome data for two mantle cell lymphomas (MCL) presented in tonsils, (2) perform basic quality control to exclude cells and doublets, (3) reduce the dimensionality of the data (PCA, UMAP), (4) cluster cells into meaningful cell states, and (4) annotate these cell states based on their top marker genes.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(tidyverse)
library(scDblFinder)
library(DT)
library(ggpubr)
library(here)
library(glue)
library(Polychrome)

2.2 Define functions and parameters

plot_histogram_qc <- function(df, x, x_lab) {
  df %>%
    ggplot(aes_string(x)) +
    geom_histogram(bins = 100) +
    labs(x = x_lab, y = "Number of Cells") +
    theme_pubr()
}
process_seurat <- function(seurat_obj, dims = 1:30, reduction = "pca") {
  seurat_obj <- seurat_obj %>%
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData() %>%
    RunPCA() %>%
    RunUMAP(dims = dims, reduction = reduction) %>%
    FindNeighbors(dims = dims, reduction = reduction)
  seurat_obj
}

# Thresholds
min_lib_size <- 900
min_n_genes <- 700
max_pct_mt_102 <- 27.5
max_pct_mt_413 <- 20
chrY_cutoff_102 <- -0.32
chrY_cutoff_413 <- -0.1

3 Download and read data

The expression and accessibility matrices for these two MCL patients are available in this Zenodo respository. We can download the data from the terminal using wget as follows:

cd MCL
mkdir -p data
wget -P data https://zenodo.org/record/6678331/files/TonsilAtlasCellRangerOuts.zip
cd data
unzip TonsilAtlasCellRangerOuts.zip

Then, we can proceed to load the data into R. Note that in this project we only focus on the RNA matrices. The accessibility information will be used in another project. We will also proceed to read the metadata:

# Read metadata
metadata <- read_csv(
  here("MCL/data/TonsilAtlasCellRangerOuts/MCL/metadata/cellranger_arc_metadata.csv"),
  col_names = TRUE
)
metadata <- metadata[str_detect(metadata$library_name, "pos"), ]
metadata <- metadata[metadata$type == "RNA", ]
DT::datatable(metadata)
gem_ids <- unique(metadata$gem_id)


# Read expression matrices and convert them to Seurat objects
path_to_mats <- here(glue("MCL/data/TonsilAtlasCellRangerOuts/MCL/BCLLATLAS_64_65/{gem_ids}/filtered_feature_bc_matrix"))
names(path_to_mats) <- gem_ids
se_l <- map(gem_ids, \(x) {
  counts <- Read10X(path_to_mats[x])$`Gene Expression`
  seurat_obj <- CreateSeuratObject(counts)
  seurat_obj$gem_id <- x
  new_metadata <- seurat_obj@meta.data %>%
    rownames_to_column("barcode") %>%
    left_join(metadata, by = "gem_id") %>%
    as.data.frame()
  rownames(new_metadata) <- new_metadata$barcode
  seurat_obj@meta.data <- new_metadata
  seurat_obj
})
metadata <- as.data.frame(metadata)
rownames(metadata) <- metadata$gem_id
names(se_l) <- metadata[gem_ids, "donor_id"]

4 Quality control

4.1 Calculate QC metrics

se_l <- map(se_l, \(seurat_obj) {
  seurat_obj$pct_mt <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")
  seurat_obj$pct_ribo <- PercentageFeatureSet(seurat_obj, pattern = "^(RPS|RPL)")
  seurat_obj
})

4.2 Visualize

# QC metrics across samples
qc_df <- map_df(se_l, \(seurat_obj) seurat_obj@meta.data)
qc_metrics_rna <- c("nCount_RNA", "nFeature_RNA", "pct_mt")
qc_ggs_rna <- map(qc_metrics_rna, \(x) {
  p <- ggplot(qc_df, aes_string("library_name", x, fill = "donor_id")) +
    geom_violin() +
    xlab("") +
    theme_classic() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
      legend.position = "none"
    )
  p
})
qc_ggs_rna[[1]] <- qc_ggs_rna[[1]] +
  scale_y_log10() +
  ylab("Library Size (total number of UMI)")
qc_ggs_rna[[2]] <- qc_ggs_rna[[2]] +
  ylab("Number of detected features")
qc_ggs_rna[[3]] <- qc_ggs_rna[[3]] +
  ylab("Percentage of mitochondrial expression (%)")
qc_ggs_rna
## [[1]]

## 
## [[2]]

## 
## [[3]]

We can clearly see that both samples have a different distribution of QC metrics. Following the best practices, we will determine the thresholds for both samples separately. In particular, we see that sample M102 has a higher mitochondrial expression that one would expect for a single-nuclei assay. According to this technical note from Cellranger: Even in 10x assays with single nuclei as input (for example, Multiome assay), an elevated level of mitochondrial genes would mean that mitochondrial RNA could remain “stuck” to nuclear membranes or may get partitioned into GEMs and be detected by the gene expression biochemistry.

4.3 Determine thresholds

# Library size
(lib_size_hists <- map(se_l, \(seurat_obj) {
  p <- plot_histogram_qc(
    seurat_obj@meta.data,
    x = "nCount_RNA",
    x_lab = "Library Size (total number of UMI)"
  )
  p +
    scale_x_log10() +
    geom_vline(xintercept = min_lib_size, color = "red", linetype = "dashed")
}))
## $M413

## 
## $M102

# Number of detected genes
(n_genes_hists <- map(se_l, \(seurat_obj) {
  p <- plot_histogram_qc(
    seurat_obj@meta.data,
    x = "nFeature_RNA",
    x_lab = "Number of detected features"
  )
  p +
    geom_vline(xintercept = min_n_genes, color = "red", linetype = "dashed")
}))
## $M413

## 
## $M102

# Percentage of mitochondrial expresion
pct_mt_hists <- map(se_l, \(seurat_obj) {
  p <- plot_histogram_qc(
    seurat_obj@meta.data,
    x = "pct_mt",
    x_lab = "Percentage of mitochondrial expression (%)"
  )
  p
})
pct_mt_hists[[1]] <- pct_mt_hists[[1]] +
  geom_vline(xintercept = max_pct_mt_413, color = "red", linetype = "dashed")
pct_mt_hists[[2]] <- pct_mt_hists[[2]] +
  geom_vline(xintercept = max_pct_mt_102, color = "red", linetype = "dashed")
pct_mt_hists
## $M413

## 
## $M102

Let us visualize the QC metrics jointly:

# Number of genes vs total UMI colored by pct_mt
(joint_gg1 <- map(se_l, \(seurat_obj) {
  p <- seurat_obj@meta.data %>%
    ggplot(aes(nCount_RNA, nFeature_RNA, color = pct_mt)) +
      geom_point() +
      geom_vline(xintercept = min_lib_size, color = "red", linetype = "dashed") +
      geom_hline(yintercept = min_n_genes, color = "red", linetype = "dashed") +
      labs(
        x = "Library Size (total number of UMI)",
        y = "Number of detected features",
        color = "Percentage of mitochondrial expression (%)"
      ) +
      scale_color_viridis_c(option = "magma") +
      theme_pubr()
  p
}))
## $M413

## 
## $M102

# % mitochondrial expression vs number of detected genes
joint_gg2 <- map(se_l, \(seurat_obj) {
  p <- seurat_obj@meta.data %>%
    ggplot(aes(nFeature_RNA, pct_mt)) +
      geom_point(size = 0.25, alpha = 0.5) +
      geom_vline(xintercept = min_n_genes, color = "red", linetype = "dashed") +
      labs(
        x = "Number of detected features",
        y = "Percentage of mitochondrial expression (%)"
      ) +
      theme_pubr()
  p
})
joint_gg2$M413 <- joint_gg2$M413 +
  geom_hline(yintercept = max_pct_mt_413, color = "red", linetype = "dashed")
joint_gg2$M102 <- joint_gg2$M102 +
  geom_hline(yintercept = max_pct_mt_102, color = "red", linetype = "dashed")
joint_gg2
## $M413

## 
## $M102

4.4 Subset

# 102
is_low_quality_102 <-
  se_l$M102$nCount_RNA < min_lib_size |
  se_l$M102$nFeature_RNA < min_n_genes |
  se_l$M102$pct_mt > max_pct_mt_102
table(is_low_quality_102)
## is_low_quality_102
## FALSE  TRUE 
## 10103   965
se_l$M102 <- se_l$M102[, !is_low_quality_102]


# 413
is_low_quality_413 <-
  se_l$M413$nCount_RNA < min_lib_size |
  se_l$M413$nFeature_RNA < min_n_genes |
  se_l$M413$pct_mt > max_pct_mt_413
table(is_low_quality_413)
## is_low_quality_413
## FALSE  TRUE 
## 11074  2001
se_l$M413 <- se_l$M413[, !is_low_quality_413]

5 Dimensionality reduction

We follow the dimensionality reduction pipeline from Seurat. This includes (1) finding highly variable genes, (2) scaling data, (3) running principal component analysis (PCA), (4) running UMAP based on PC coordinates, (5) embedding cells in a K-nearest neighbors graph (representing cells as principal components).

se_l <- map(se_l, process_seurat)
map(se_l, DimPlot, reduction = "umap")
## $M413

## 
## $M102

5.1 Doublets

We follow the “Doublet detection by simulation” section from the book “Orchestrating Single-cell Analysis With Bioconductor”:

se_l <- map(se_l, \(seurat_obj) {
  sce <- as.SingleCellExperiment(seurat_obj)
  seurat_obj$doublet_score <- computeDoubletDensity(sce, dims = 30, k = 20)
  seurat_obj
})
map(se_l, \(seurat_obj) {
  p <- FeaturePlot(seurat_obj, "doublet_score") +
    scale_color_viridis_c(option = "magma")
  p
})
## $M413

## 
## $M102

6 Cluster and annotate

The first step will be to cluster cells and identify clusters of doublets using multiple sources of evidence (QC metrics, markers, scDblFinder). Second, we will cluster and annotate cells to a specific cell state.

6.1 M102

se_l$M102 <- FindClusters(se_l$M102, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10103
## Number of edges: 360948
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8462
## Number of communities: 9
## Elapsed time: 1 seconds
cols <- kelly.colors(length(levels(se_l$M102$seurat_clusters)) + 1)
names(cols) <- NULL
cols <- cols[2:length(cols)]
DimPlot(se_l$M102, cols = cols)

We see that cluster 1 seems to correspond to the cluster that also has a high doublet score. Let us assess the distribution of QC metrics:

vln_qc_clusters <- VlnPlot(
  se_l$M102,
  c("nCount_RNA", "nFeature_RNA", "pct_mt", "doublet_score"),
  pt.size = 0,
  ncol = 2,
  cols = cols
)
vln_qc_clusters[[1]] <- vln_qc_clusters[[1]] +
  scale_y_log10()
vln_qc_clusters

In fact, we see that both cluster 1 and cluster 6 have a disproportionately high number of counts, detected genes, and doublet score. We proceed to find markers for these clusters:

markers_1_6 <- map(c("1", "6"), \(x) {
  df <- FindMarkers(
    se_l$M102,
    ident.1 = x,
    only.pos = TRUE
  )
  head(df, 40)
})
markers_1_6
## [[1]]
##                    p_val avg_log2FC pct.1 pct.2    p_val_adj
## EEPD1      4.119048e-102  0.3016145 0.472 0.180 1.507613e-97
## SLC44A5     6.159026e-94  0.3499780 0.317 0.105 2.254265e-89
## GFOD1       2.689827e-90  0.2545392 0.471 0.187 9.845035e-86
## ATRNL1      3.621253e-80  0.2787145 0.475 0.202 1.325415e-75
## AC090125.1  8.104562e-77  0.2591640 0.401 0.163 2.966351e-72
## ZNF804A     1.072725e-76  0.4079666 0.999 0.944 3.926282e-72
## LY75        1.863314e-71  0.2524651 0.580 0.272 6.819915e-67
## PDE3B       1.295099e-70  0.2598599 0.691 0.361 4.740191e-66
## EXT1        1.777397e-68  0.4033100 0.915 0.645 6.505451e-64
## ADARB2      1.597433e-65  0.3141427 0.670 0.365 5.846764e-61
## PLCL2       2.586330e-65  0.3983377 0.980 0.807 9.466227e-61
## IGF2BP3     5.507840e-64  0.2675866 0.755 0.419 2.015925e-59
## ROR1        6.122316e-64  0.3488344 0.993 0.914 2.240829e-59
## HS2ST1      8.852155e-63  0.2678228 0.658 0.349 3.239977e-58
## TMEM65      1.554270e-62  0.2811789 0.707 0.384 5.688783e-58
## LTBP1       5.804061e-61  0.2826305 0.710 0.389 2.124344e-56
## MAN1A1      2.896576e-58  0.3578664 0.944 0.705 1.060176e-53
## MSI2        8.199324e-57  0.3115818 0.994 0.918 3.001034e-52
## OSBPL3      1.967148e-56  0.3376223 0.922 0.657 7.199958e-52
## AL136962.1  2.220951e-55  0.3249047 0.834 0.540 8.128903e-51
## KHDRBS2     1.146607e-54  0.2590180 0.743 0.439 4.196695e-50
## PSD3        2.425444e-54  0.2600765 0.786 0.458 8.877367e-50
## PLCB1       4.209754e-54  0.3420264 0.710 0.424 1.540812e-49
## CMSS1       1.376715e-53  0.2512005 0.766 0.466 5.038913e-49
## CCDC88A     1.049070e-52  0.3145087 0.812 0.498 3.839702e-48
## GALNT1      1.288633e-49  0.2683721 0.792 0.466 4.716525e-45
## TCF4        1.347879e-49  0.3171343 0.968 0.807 4.933372e-45
## JAZF1       5.592023e-49  0.3383225 0.961 0.775 2.046736e-44
## GAB2        2.950968e-47  0.2881779 0.988 0.867 1.080084e-42
## MALT1       1.671822e-46  0.2516282 0.884 0.592 6.119037e-42
## RGS12       1.996903e-46  0.2500715 0.904 0.638 7.308864e-42
## ITFG1       5.612126e-46  0.3003210 0.864 0.595 2.054094e-41
## ZNRF2       2.118019e-44  0.2521834 0.827 0.521 7.752162e-40
## TSHZ2       4.747068e-43  0.2744857 0.950 0.710 1.737474e-38
## TRPS1       6.198662e-43  0.2680878 0.927 0.694 2.268772e-38
## ITPR2       5.034643e-41  0.2743261 0.934 0.693 1.842730e-36
## IMMP2L      5.191785e-41  0.2521681 0.899 0.654 1.900245e-36
## FNDC3A      6.565079e-41  0.2582182 0.900 0.623 2.402885e-36
## TMEM163     5.364602e-40  0.2707752 0.956 0.765 1.963498e-35
## MCTP2       1.108299e-39  0.2744925 0.872 0.595 4.056486e-35
## 
## [[2]]
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## CDCA8     0.000000e+00  0.4979704 0.289 0.006  0.000000e+00
## KIF2C     0.000000e+00  0.7385616 0.403 0.014  0.000000e+00
## NUF2      0.000000e+00  0.9097619 0.505 0.025  0.000000e+00
## ASPM      0.000000e+00  1.7927932 0.669 0.016  0.000000e+00
## KIF14     0.000000e+00  0.9325764 0.462 0.014  0.000000e+00
## CENPF     0.000000e+00  2.0241955 0.695 0.041  0.000000e+00
## RRM2      0.000000e+00  1.5572361 0.649 0.018  0.000000e+00
## NCAPH     0.000000e+00  0.6819171 0.364 0.009  0.000000e+00
## CKAP2L    0.000000e+00  0.6660459 0.321 0.003  0.000000e+00
## SPC25     0.000000e+00  0.5726783 0.275 0.004  0.000000e+00
## KIF15     0.000000e+00  0.9093094 0.446 0.018  0.000000e+00
## POLQ      0.000000e+00  1.6414247 0.695 0.049  0.000000e+00
## ECT2      0.000000e+00  0.8235383 0.361 0.011  0.000000e+00
## NCAPG     0.000000e+00  0.8516475 0.446 0.011  0.000000e+00
## HIST1H3B  0.000000e+00  1.0575366 0.426 0.009  0.000000e+00
## HIST1H1B  0.000000e+00  1.5948073 0.636 0.052  0.000000e+00
## KIFC1     0.000000e+00  0.7923254 0.380 0.008  0.000000e+00
## ANLN      0.000000e+00  0.7054958 0.348 0.006  0.000000e+00
## NCAPG2    0.000000e+00  1.5227158 0.708 0.058  0.000000e+00
## ESCO2     0.000000e+00  0.5126381 0.243 0.004  0.000000e+00
## MELK      0.000000e+00  0.8686628 0.439 0.014  0.000000e+00
## CDK1      0.000000e+00  0.5857226 0.298 0.004  0.000000e+00
## KIF11     0.000000e+00  1.1009219 0.538 0.019  0.000000e+00
## MKI67     0.000000e+00  2.0348210 0.823 0.016  0.000000e+00
## E2F8      0.000000e+00  0.5736032 0.249 0.002  0.000000e+00
## CDCA5     0.000000e+00  0.6658284 0.331 0.011  0.000000e+00
## DIAPH3    0.000000e+00  1.9461695 0.767 0.041  0.000000e+00
## DLGAP5    0.000000e+00  0.4790188 0.246 0.003  0.000000e+00
## BUB1B     0.000000e+00  0.7597472 0.384 0.011  0.000000e+00
## NUSAP1    0.000000e+00  1.5931659 0.757 0.037  0.000000e+00
## KIF23     0.000000e+00  0.6590418 0.305 0.004  0.000000e+00
## SHCBP1    0.000000e+00  1.0745399 0.541 0.017  0.000000e+00
## AURKB     0.000000e+00  0.5470624 0.272 0.003  0.000000e+00
## TOP2A     0.000000e+00  0.9309511 0.413 0.011  0.000000e+00
## KIF18B    0.000000e+00  0.5943851 0.275 0.004  0.000000e+00
## BIRC5     0.000000e+00  0.8907318 0.430 0.005  0.000000e+00
## TPX2      0.000000e+00  0.7740614 0.390 0.011  0.000000e+00
## GTSE1     0.000000e+00  0.9687339 0.479 0.006  0.000000e+00
## HJURP    6.782255e-301  0.3503057 0.187 0.002 2.482373e-296
## ESPL1    1.176614e-297  0.5432808 0.282 0.008 4.306524e-293

Cluster 1 does not have any specific marker (all markers have a very low log2 FC). Cluster 6, on the other hand, consists of proliferative cells, which is consistent with the high number of counts and features. Let us calculate a per-cell S phase and G2M phase score:

se_l <- map(se_l, \(seurat_obj) {
  seurat_obj <- CellCycleScoring(
    seurat_obj,
    s.features = cc.genes.updated.2019$s.genes,
    g2m.features = cc.genes.updated.2019$g2m.genes
  )
  seurat_obj
})
map(se_l, FeaturePlot, features = c("S.Score", "G2M.Score"))
## $M413

## 
## $M102

Finally, we run the findDoubletClusters from scDblFinder, which identifies clusters with an expression profile lying between two other clusters:

sce_M102 <- as.SingleCellExperiment(se_l$M102)
dbl_out <- findDoubletClusters(
  sce_M102,
  clusters = sce_M102$seurat_clusters
)
dbl_out
## DataFrame with 9 rows and 9 columns
##       source1     source2    num.de median.de        best     p.value lib.size1 lib.size2       prop
##   <character> <character> <integer> <numeric> <character>   <numeric> <numeric> <numeric>  <numeric>
## 7           8           5        26    1530.0       ATXN1 2.53992e-09  1.049385  1.422903 0.00772048
## 0           7           1        86     535.5      MT-CO2 5.23716e-22  0.654574  2.219932 0.40473127
## 2           7           1        86     745.5   LINC01250 1.68773e-40  0.652915  2.214306 0.14668910
## 3           6           5       114     609.0       TSHZ2 3.06402e-35  1.844963  0.895737 0.09650599
## 5           7           3       171     936.0   LINC01250 7.21497e-62  0.702789  1.116399 0.07156290
## 8           7           4       236    2159.5         FYN 3.15284e-24  0.952939  1.681463 0.00752252
## 4           7           6       347    1112.5       ABTB2 1.33414e-86  0.566732  1.660963 0.08383648
## 1           7           6       395    1277.0       TSHZ2 7.55530e-32  0.294862  0.864174 0.15124221
## 6           8           1       625    1212.5       MKI67 2.45876e-75  0.358057  1.157174 0.03018905

We see that cluster 8 consists of T-cells (contamination / doublets):

FeaturePlot(se_l$M102, "CD3D")

VlnPlot(se_l$M102, "CD3D", cols = cols) + NoLegend()

markers_8 <- FindMarkers(
  se_l$M102,
  ident.1 = "8",
  only.pos = TRUE,
  logfc.threshold = 1
)
head(markers_8, 20)
##            p_val avg_log2FC pct.1 pct.2 p_val_adj
## TGFBR3         0   2.023740 0.382 0.005         0
## CD2            0   1.824516 0.421 0.000         0
## CD247          0   2.866330 0.645 0.003         0
## STAT4          0   3.486855 0.724 0.017         0
## CD28           0   2.038401 0.553 0.012         0
## CTLA4          0   1.319968 0.289 0.001         0
## ICOS           0   2.520687 0.539 0.002         0
## IKZF2          0   2.335888 0.382 0.001         0
## TNIK           0   3.303764 0.632 0.002         0
## GPRIN3         0   2.874834 0.632 0.014         0
## LEF1           0   1.911537 0.263 0.001         0
## INPP4B         0   3.156366 0.671 0.002         0
## IL7R           0   1.925463 0.395 0.001         0
## GZMK           0   1.400449 0.197 0.000         0
## PAM            0   2.577763 0.645 0.002         0
## CAMK4          0   2.665704 0.487 0.002         0
## ITK            0   2.808106 0.645 0.008         0
## AC010609.1     0   1.117180 0.250 0.001         0
## THEMIS         0   2.183010 0.289 0.001         0
## TRBC1          0   1.460282 0.224 0.001         0

With all the evidence gathered, we proceed to exclude cluster 1 and 8:

se_l$M102 <- se_l$M102[, !(se_l$M102$seurat_clusters %in% c("1", "8"))]
DimPlot(se_l$M102, cols = cols)

We now reprocess the dataset and cluster cells into meaningful cell states:

se_l$M102 <- process_seurat(se_l$M102)
DimPlot(se_l$M102, cols = cols)

We know from exploratory analysis that the main driver of intratumoral transcriptional heterogeneity in MCL are copy number alterations. In particular, we detected a loss of chromosome Y (among others) using infercnv. We visualize the expression of genes encoded in the Y chromosome, collapse their expression into a per cell score (“chr Y score”) and classify them in chr Y+ or chr Y- using this score:

# Visualize expression of genes encoded in chromosome Y
goi_chrY <- c("UTY", "KDM5D", "DDX3Y", "USP9Y", "ZFY", "EIF1AY")
FeaturePlot(se_l$M102, features = goi_chrY) &
  scale_color_viridis_c(option = "magma") &
  NoLegend()

# Score cells based on chromosome Y expression
se_l$M102 <- AddModuleScore(
  se_l$M102,
  features = list(goi_chrY),
  name = "chrY_score"
)
FeaturePlot(se_l$M102, "chrY_score1") &
  scale_color_viridis_c(option = "magma")

# Classify cells in chr Y + and chr Y -
(density_gg <- se_l$M102@meta.data %>%
  ggplot(aes(chrY_score1)) +
  geom_density() +
  geom_vline(xintercept = chrY_cutoff_102, linetype = "dashed", color = "darkblue") +
  theme_classic())

se_l$M102$has_loss_chrY <- ifelse(
  se_l$M102$chrY_score1 > chrY_cutoff_102,
  "chrY+",
  "chrY-"
)
DimPlot(se_l$M102, group.by = "has_loss_chrY")

Strategy: identify broad clusters (chrY+ and -), subcluster inside each of them with FindSubCluster:

se_l$M102 <- FindClusters(se_l$M102, resolution = 0.05)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8499
## Number of edges: 308665
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9663
## Number of communities: 3
## Elapsed time: 0 seconds
DimPlot(se_l$M102)

# Subcluster cluster 0 (chr Y+)
se_l$M102 <- FindSubCluster(
  se_l$M102,
  graph.name = "RNA_snn",
  resolution = 0.6,
  cluster = "0",
  subcluster.name = "subclusters_0"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5901
## Number of edges: 209385
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7618
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(se_l$M102, group.by = "subclusters_0")

# Subcluster cluster 1 (chr Y-)
se_l$M102$clusters_20230625 <- se_l$M102$subclusters_0
Idents(se_l$M102) <- "clusters_20230625"
se_l$M102 <- FindSubCluster(
  se_l$M102,
  graph.name = "RNA_snn",
  resolution = 0.6,
  cluster = "1",
  subcluster.name = "clusters_20230625"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2292
## Number of edges: 85715
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7027
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(se_l$M102, group.by = "clusters_20230625")

# Further subcluster to get non-tumoral cells
Idents(se_l$M102) <- "clusters_20230625"
se_l$M102 <- FindSubCluster(
  se_l$M102,
  graph.name = "RNA_snn",
  resolution = 0.1,
  cluster = "1_4",
  subcluster.name = "clusters_20230625"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 80
## Number of edges: 1431
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9126
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(se_l$M102, group.by = "clusters_20230625")

Idents(se_l$M102) <- "clusters_20230625"

6.2 Markers and annotation

# Markers subclusters cluster 0
se_clus0 <- se_l$M102[, str_detect(se_l$M102$clusters_20230625, "^0")]
DimPlot(se_clus0)

markers_0 <- FindAllMarkers(se_clus0, only.pos = TRUE, logfc.threshold = 0.75)
markers_0 <- markers_0[markers_0$p_val_adj < 0.001, ]
DT::datatable(markers_0, options = list(scrollX = TRUE))
# Markers subclusters cluster 1
se_clus1 <- se_l$M102[, str_detect(se_l$M102$clusters_20230625, "^1")]
Idents(se_clus1) <- "clusters_20230625"
DimPlot(se_clus1)

markers_1 <- FindAllMarkers(se_clus1, only.pos = TRUE, logfc.threshold = 0.75)
markers_1 <- markers_1[markers_1$p_val_adj < 0.001, ]
DT::datatable(markers_1, options = list(scrollX = TRUE))
Idents(se_l$M102) <- "clusters_20230625"

Annotation:

Subcluster Markers Annotation
0_0 TCF4, RGS12 c1 TCF4Hi RGS12Hi
0_1 MT1G, MT1E, MT1X, MT2A c1 metallothionein
0_2 MT1G, MT1E, MT1X, MT2A c1 metallothionein
0_3 MYC, NFKB1, MIR155HG c1 MYC+NFKB1+MIR155HG+
0_4 CD69, JUN, FOS c1 CD69+AP1+
0_5 TUBB, CENPP c1 S phase
0_6 CCL22, NME1 c1 CCL22+NME1+
1_0 MT1G, MT1E, MT1X, MT2A c2 metallothionein
1_1 CD69, JUN, FOS c2 CD69+AP1+
1_2 TCF4, RGS12 c2 TCF4Hi RGS12Hi
1_3 MYC, NFKB1, MIR155HG c2 MYC+NFKB1+MIR155HG+
1_4_0 NR3C2 c2 NR3C2+
1_4_1 SOX11-, CCND1- c3 non-tumoral B-cells
2 MKI67, CENPF c1 G2M Phase
current_levels <- c("0_0", "0_1", "0_2", "0_3", "0_4", "0_5", "0_6", "1_0", "1_1",
                    "1_2", "1_3", "1_4_0", "1_4_1", "2")
new_levels <- c("c1 TCF4Hi RGS12Hi", "c1 metallothionein", "c1 metallothionein",
                "c1 MYC+NFKB1+MIR155HG+", "c1 CD69+AP1+", "c1 S phase",
                "c1 CCL22+NME1+", "c2 metallothionein", "c2 CD69+AP1+",
                "c2 TCF4Hi RGS12Hi", "c2 MYC+NFKB1+MIR155HG+", "c2 NR3C2+",
                "c3 non-tumoral B-cells", "c1 G2M Phase")
names(new_levels) <- current_levels

se_l$M102$annotation_20230625 <- new_levels[se_l$M102$clusters_20230625]
Idents(se_l$M102) <- "annotation_20230625"
cols <- kelly.colors(length(new_levels) + 1)
names(cols) <- NULL
cols <- cols[2:length(cols)]
DimPlot(se_l$M102, pt.size = 0.85, cols = cols)

Plot markers

goi <- c(goi_chrY, "TCF4", "RGS12", "MT1G", "MT1E", "MT1X", "MT2A", "MYC",
         "NFKB1", "MIR155HG", "CD69", "JUN", "FOS", "TUBB", "CENPP", "CCL22",
         "NME1", "NR3C2", "SOX11", "CCND1", "MKI67", "CENPF")
dot_plot <- DotPlot(se_l$M102, features = rev(goi)) +
  scale_color_distiller(palette = "Blues", direction = 1) +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_text(size = 8),
    legend.text = element_text(size = 7),
    legend.title = element_text(size = 7),
    axis.text.x = element_text(size = 8, angle = 90, hjust = 1, vjust = 1)
  )
dot_plot

6.3 M413

We follow the same process for the second donor (M413):

se_l$M413 <- FindClusters(se_l$M413, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11074
## Number of edges: 399988
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8898
## Number of communities: 13
## Elapsed time: 1 seconds
cols <- kelly.colors(length(levels(se_l$M413$seurat_clusters)) + 1)
names(cols) <- NULL
cols <- cols[2:length(cols)]

vln_qc_clusters <- VlnPlot(
  se_l$M413,
  c("nCount_RNA", "nFeature_RNA", "pct_mt", "doublet_score"),
  pt.size = 0,
  ncol = 2,
  cols = cols
)
vln_qc_clusters[[1]] <- vln_qc_clusters[[1]] +
  scale_y_log10()
vln_qc_clusters

umap_dbl_score <- FeaturePlot(se_l$M413, "doublet_score") +
  scale_color_viridis_c(option = "magma")
umap_clusters <- DimPlot(se_l$M413, group.by = "seurat_clusters", cols = cols)
umap_clusters | umap_dbl_score # cluster 4 seems to be composed of doublets

FeaturePlot(se_l$M413, c("CD3E", "CD3D")) # cluster 9 might be T cells

markers_4_9 <- map(c("4", "9"), \(x) {
  df <- FindMarkers(
    se_l$M413,
    ident.1 = x,
    only.pos = TRUE
  )
  head(df, 40)
})
map(markers_4_9, head, 30)
## [[1]]
##                    p_val avg_log2FC pct.1 pct.2     p_val_adj
## NEB        1.489423e-193  0.8185946 0.885 0.335 5.451437e-189
## RASSF6     6.840967e-192  0.7913718 0.662 0.190 2.503862e-187
## LINC01170  3.009270e-187  0.7972957 0.446 0.097 1.101423e-182
## PCDH9      4.679998e-184  0.5076865 0.594 0.156 1.712926e-179
## CLNK       2.964332e-169  0.5101595 0.615 0.173 1.084975e-164
## LINC01876  2.604968e-168  0.4905777 0.426 0.094 9.534442e-164
## ZNF331     9.665181e-154  0.9075758 0.669 0.242 3.537553e-149
## SAMD12     7.070175e-150  0.4835986 0.706 0.234 2.587755e-145
## LHFPL2     9.661333e-146  0.6648814 0.952 0.460 3.536145e-141
## AC011029.1 2.085981e-138  0.4222857 0.275 0.050 7.634900e-134
## CHPT1      3.015170e-129  0.7396944 0.740 0.288 1.103582e-124
## LINC01811  6.944307e-129  0.4236081 0.425 0.114 2.541686e-124
## DIP2C      4.072152e-124  0.4898166 0.622 0.214 1.490448e-119
## IFNG-AS1   1.991248e-123  0.5814476 0.925 0.454 7.288167e-119
## AL589693.1 1.471374e-120  0.4877610 0.702 0.266 5.385377e-116
## POF1B      7.695993e-119  0.2904005 0.479 0.141 2.816810e-114
## KHDRBS3    6.644928e-117  0.4994470 0.250 0.049 2.432110e-112
## CRY1       8.937637e-114  0.3976285 0.455 0.137 3.271265e-109
## ZNF10      2.886105e-113  0.5467851 0.613 0.224 1.056343e-108
## LINC01949  9.888942e-109  0.2903443 0.291 0.067 3.619452e-104
## NR4A2      7.925605e-108  0.4388300 0.410 0.122 2.900851e-103
## CHL1       2.644286e-106  0.2986969 0.307 0.075 9.678352e-102
## BCL2L11    4.110751e-104  0.4535714 0.474 0.156  1.504576e-99
## PPP1R9A    4.373997e-104  0.3943337 0.288 0.068  1.600927e-99
## IRS2       7.435251e-102  0.2777543 0.208 0.039  2.721376e-97
## RGS2       1.814152e-101  0.4177176 0.446 0.145  6.639977e-97
## TNFRSF21   6.634843e-101  0.2778408 0.232 0.048  2.428419e-96
## C12orf74    1.370321e-99  0.3635609 0.549 0.193  5.015511e-95
## NR4A3       2.671434e-99  0.2874947 0.309 0.080  9.777714e-95
## KCNQ1       1.571569e-97  0.2772179 0.468 0.152  5.752099e-93
## 
## [[2]]
##            p_val avg_log2FC pct.1 pct.2 p_val_adj
## CD2            0  1.6735759 0.419 0.004         0
## CD247          0  2.5755216 0.648 0.011         0
## HNRNPLL        0  1.9191762 0.512 0.011         0
## AAK1           0  2.2796268 0.615 0.038         0
## LINC01934      0  3.2191254 0.548 0.041         0
## PLCL1          0  2.9147021 0.505 0.017         0
## CD28           0  1.1446739 0.289 0.002         0
## ICOS           0  1.4612454 0.316 0.003         0
## TRAT1          0  0.8116984 0.186 0.002         0
## CD96           0  2.5146420 0.714 0.029         0
## TNIK           0  2.3195393 0.538 0.020         0
## DTHD1          0  1.7384070 0.329 0.004         0
## PTPN13         0  1.3406290 0.226 0.004         0
## GPRIN3         0  2.3136307 0.575 0.017         0
## LEF1           0  1.6233189 0.302 0.004         0
## INPP4B         0  3.2944994 0.718 0.013         0
## AC139720.1     0  1.6323853 0.229 0.004         0
## IL7R           0  1.1029533 0.226 0.002         0
## FYB1           0  3.1914259 0.827 0.039         0
## PAM            0  2.4210930 0.585 0.027         0
## CAMK4          0  1.8793804 0.405 0.015         0
## PPP2R2B        0  1.8381960 0.299 0.006         0
## ITK            0  1.8560248 0.482 0.010         0
## LCP2           0  1.8670422 0.548 0.025         0
## THEMIS         0  3.4031080 0.721 0.011         0
## TOX            0  3.1708965 0.801 0.083         0
## TTC39B         0  1.6936300 0.419 0.017         0
## MLLT3          0  2.1506749 0.472 0.031         0
## PRKCQ          0  1.5945736 0.415 0.006         0
## ST8SIA1        0  3.3122906 0.751 0.011         0
# scDblFinder
sce_M413 <- as.SingleCellExperiment(se_l$M413)
dbl_out <- findDoubletClusters(
  sce_M413,
  clusters = sce_M413$seurat_clusters
)
dbl_out
## DataFrame with 13 rows and 9 columns
##         source1     source2    num.de median.de        best      p.value lib.size1 lib.size2       prop
##     <character> <character> <integer> <numeric> <character>    <numeric> <numeric> <numeric>  <numeric>
## 0            12           5        13     463.0    HSP90AA1  1.64605e-09  1.009592  1.785401 0.31497201
## 12           11           3        40     303.5      HSPA1B  1.26195e-33  1.820129  1.063446 0.00758534
## 2            12          11        46     603.5       TSHZ2  1.06602e-41  1.002583  1.824830 0.10032509
## 3            12           4        67     974.5      HSPA1B  5.28304e-16  0.940339  2.564506 0.08994040
## 11           12           2       100     381.5       SKAP1  1.28777e-21  0.549412  0.547996 0.01354524
## ...         ...         ...       ...       ...         ...          ...       ...       ...        ...
## 4            12           1       409    1436.0        CD58  1.60262e-09  0.366675  0.418753  0.0831678
## 10           12           9       413    1184.5      FNDC3B  6.05853e-53  0.272392  0.321256  0.0135452
## 6            12           8       440    1359.5    MIR155HG  1.27779e-35  0.773832  1.501558  0.0469568
## 9            11          10       540    1126.5       PRKCH  6.34361e-90  1.543282  3.112780  0.0271808
## 8            12           7      1357    2414.5  AC023590.1 9.99730e-146  0.515353  0.545560  0.0273614
# Exclude cluster 4 (doublets) and 9 (T cells) and reprocess
se_l$M413 <- se_l$M413[, !(se_l$M413$seurat_clusters %in% c("4", "9"))]
DimPlot(se_l$M413, cols = cols)

se_l$M413 <- process_seurat(se_l$M413)
DimPlot(se_l$M413, cols = cols)

Similar as before, we classify cells into chrY+ and chrY-:

# Visualize expression of genes encoded in chromosome Y
FeaturePlot(se_l$M413, features = goi_chrY) &
  scale_color_viridis_c(option = "magma") &
  NoLegend()

# Score cells based on chromosome Y expression
se_l$M413 <- AddModuleScore(
  se_l$M413,
  features = list(goi_chrY),
  name = "chrY_score"
)
FeaturePlot(se_l$M413, "chrY_score1") &
  scale_color_viridis_c(option = "magma")

# Classify cells in chr Y + and chr Y -
(density_gg <- se_l$M413@meta.data %>%
  ggplot(aes(chrY_score1)) +
  geom_density() +
  geom_vline(xintercept = chrY_cutoff_413, linetype = "dashed", color = "darkblue") +
  theme_classic())

se_l$M413$has_loss_chrY <- ifelse(
  se_l$M413$chrY_score1 > chrY_cutoff_413,
  "chrY+",
  "chrY-"
)
DimPlot(se_l$M413, group.by = "has_loss_chrY")

Cluster:

se_l$M413 <- FindClusters(se_l$M413, resolution = 0.05)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9852
## Number of edges: 366652
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9702
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(se_l$M413)

# Subcluster cluster 0 (chr Y+)
se_l$M413 <- FindSubCluster(
  se_l$M413,
  graph.name = "RNA_snn",
  resolution = 0.5,
  cluster = "0",
  subcluster.name = "subclusters_0"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6709
## Number of edges: 241651
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8174
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(se_l$M413, group.by = "subclusters_0")

# Further subcluster cluster 0_2
Idents(se_l$M413) <- "subclusters_0"
se_l$M413 <- FindSubCluster(
  se_l$M413,
  graph.name = "RNA_snn",
  resolution = 0.2,
  cluster = "0_2",
  subcluster.name = "clusters_20230625"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1103
## Number of edges: 35158
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8297
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(se_l$M413, group.by = "clusters_20230625")

Idents(se_l$M413) <- "clusters_20230625"


# Finally, let us further subcluster cluster 3, which contains normal GCBC
se_l$M413 <- FindSubCluster(
  se_l$M413,
  graph.name = "RNA_snn",
  resolution = 0.1,
  cluster = "2",
  subcluster.name = "clusters_20230625"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 556
## Number of edges: 17577
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9406
## Number of communities: 2
## Elapsed time: 0 seconds
cols <- kelly.colors(length(unique(se_l$M413$clusters_20230625)) + 1)
names(cols) <- NULL
cols <- cols[2:length(cols)]
DimPlot(se_l$M413, group.by = "clusters_20230625", cols = cols)

Idents(se_l$M413) <- "clusters_20230625"

6.4 Markers and annotation

markers_413 <- FindAllMarkers(se_l$M413, only.pos = TRUE, logfc.threshold = 0.75)
markers_413 <- markers_413[markers_413$p_val_adj < 0.001, ]
DT::datatable(markers_413, options = list(scrollX = TRUE))

Annotation:

Subcluster Markers Annotation
0_0 chrY-, CD69lo, CXCR4lo, JUNB- c1 chrY-CD69LoCXCR4LoJUNB-
0_1 chrY+, CD69lo, CXCR4lo, JUNB- c1 chrY+CD69LoCXCR4LoJUNB-
0_2_0 CD69, CXCR4, JUNB c1 CD69Hi CXCR4HiJUNB+
0_2_1 CD69, CXCR4, JUNB, TSHZ2, MECOM c1 CD69Hi CXCR4HiJUNB+TSHZ2+MECOM+
0_3 TSHZ2, MECOM c1 TSHZ2+MECOM+
0_4 CENPK, POLQ c1 S phase
0_5 MYC, MIR155HG, NFKB1 c1 MYC+MIR155HG+NFKB1Hi
0_6 MT-ND2, MT-ND3, MT-ATP8 poor-quality
0_7 HSPD1, HSP90AB1, HSPA1A poor-quality
1 PCDH9 c2 PCDH9 Hi
2_0 MEF2B, RGS13 GCBC
2_1 CENPF, CENPE c1 G2M phase
3 CD96, TOX, SOX5 NBC/MBC
4 XBP1, PRDM1 PC
current_levels <- c("0_0", "0_1", "0_2_0", "0_2_1", "0_3", "0_4", "0_5", "0_6",
                    "0_7", "1", "2_0", "2_1", "3", "4")
new_levels <- c("c1 chrY-CD69LoCXCR4LoJUNB-", "c1 chrY+CD69LoCXCR4LoJUNB-",
                "c1 CD69Hi CXCR4HiJUNB+", "c1 CD69Hi CXCR4HiJUNB+TSHZ2+MECOM+",
                "c1 TSHZ2+MECOM+", "c1 S phase", "c1 MYC+MIR155HG+NFKB1Hi",
                "poor-quality", "poor-quality", "c2 PCDH9 Hi", "GCBC", "c1 G2M phase",
                "NBC/MBC", "PC")
names(new_levels) <- current_levels
se_l$M413$annotation_20230625 <- new_levels[se_l$M413$clusters_20230625]
Idents(se_l$M413) <- "annotation_20230625"
cols <- kelly.colors(length(new_levels) + 1)
names(cols) <- NULL
cols <- cols[2:length(cols)]
DimPlot(se_l$M413, pt.size = 0.85, cols = cols)

# Remove cluster 6 and reprocess
se_l$M413 <- se_l$M413[, se_l$M413$annotation_20230625 != "poor-quality"]
se_l$M413 <- process_seurat(se_l$M413)
DimPlot(se_l$M413, pt.size = 0.85, cols = cols)

Plot markers

goi_413 <- c("CCND1", "SOX11", "CD69", "FOS", "ZNF331", "RGS2" , "MECOM",
             "TSHZ2", "JUNB", "CXCR4", "CCL3", "PCDH9", "MIR155HG", "NFKB1",
             "PCNA", "TOP2A", "MCM6", "MKI67", "MYC", "IRF4", "PRDM1", "BCL6",
             "MEF2B", "PLAC8", "SOX5")
dot_plot <- DotPlot(se_l$M413, features = rev(goi_413)) +
  scale_color_distiller(palette = "Blues", direction = 1) +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_text(size = 8),
    legend.text = element_text(size = 7),
    legend.title = element_text(size = 7),
    axis.text.x = element_text(size = 8, angle = 90, hjust = 1, vjust = 1)
  )
dot_plot

7 Save

dir.create(here("MCL/results/R_objects"), recursive = TRUE)
saveRDS(se_l, here("MCL/results/R_objects/20230625_seurat_list_MCL_M102_M413_tumoral_cells_annotated.rds"))

8 Session Information

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /home/groups/singlecell/rmassoni/anaconda3/envs/richter/lib/libopenblasp-r0.3.21.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Polychrome_1.5.1            glue_1.6.2                  here_1.0.1                  ggpubr_0.6.0                DT_0.27                     scDblFinder_1.12.0          SingleCellExperiment_1.20.0 SummarizedExperiment_1.28.0 Biobase_2.58.0              GenomicRanges_1.50.0        GenomeInfoDb_1.34.9         IRanges_2.32.0              S4Vectors_0.36.0            BiocGenerics_0.44.0         MatrixGenerics_1.10.0       matrixStats_0.63.0          forcats_1.0.0               stringr_1.5.0               dplyr_1.1.0                 purrr_1.0.1                 readr_2.1.4                 tidyr_1.3.0                 tibble_3.1.8                ggplot2_3.4.1               tidyverse_1.3.2             SeuratObject_4.1.3          Seurat_4.3.0                BiocStyle_2.26.0           
## 
## loaded via a namespace (and not attached):
##   [1] rtracklayer_1.58.0        scattermore_0.8           R.methodsS3_1.8.2         bit64_4.0.5               knitr_1.42                R.utils_2.12.2            irlba_2.3.5.1             DelayedArray_0.24.0       data.table_1.14.6         RCurl_1.98-1.10           generics_0.1.3            ScaledMatrix_1.6.0        cowplot_1.1.1             RANN_2.6.1                future_1.31.0             bit_4.0.5                 tzdb_0.3.0                spatstat.data_3.0-0       xml2_1.3.3                lubridate_1.9.1           httpuv_1.6.9              assertthat_0.2.1          viridis_0.6.2             gargle_1.3.0              xfun_0.37                 hms_1.1.2                 jquerylib_0.1.4           evaluate_0.20             promises_1.2.0.1          fansi_1.0.4               restfulr_0.0.15           dbplyr_2.3.0              readxl_1.4.2              igraph_1.3.5              DBI_1.1.3                 htmlwidgets_1.6.1         spatstat.geom_3.0-6       googledrive_2.0.0         ellipsis_0.3.2            crosstalk_1.2.0           backports_1.4.1           bookdown_0.33             deldir_1.0-6              sparseMatrixStats_1.10.0  vctrs_0.5.2              
##  [46] ROCR_1.0-11               abind_1.4-5               cachem_1.0.6              withr_2.5.0               progressr_0.13.0          vroom_1.6.1               sctransform_0.3.5         GenomicAlignments_1.34.0  scran_1.26.2              goftest_1.2-3             cluster_2.1.4             lazyeval_0.2.2            crayon_1.5.2              spatstat.explore_3.0-6    labeling_0.4.2            edgeR_3.40.2              pkgconfig_2.0.3           nlme_3.1-162              vipor_0.4.5               rlang_1.0.6               globals_0.16.2            lifecycle_1.0.3           miniUI_0.1.1.1            modelr_0.1.10             rsvd_1.0.5                ggrastr_1.0.1             cellranger_1.1.0          rprojroot_2.0.3           polyclip_1.10-4           lmtest_0.9-40             Matrix_1.5-3              carData_3.0-5             zoo_1.8-11                reprex_2.0.2              beeswarm_0.4.0            ggridges_0.5.4            googlesheets4_1.0.1       png_0.1-8                 viridisLite_0.4.1         rjson_0.2.21              bitops_1.0-7              R.oo_1.25.0               KernSmooth_2.23-20        Biostrings_2.66.0         DelayedMatrixStats_1.20.0
##  [91] parallelly_1.34.0         spatstat.random_3.1-3     rstatix_0.7.2             ggsignif_0.6.4            beachmat_2.14.2           scales_1.2.1              magrittr_2.0.3            plyr_1.8.8                ica_1.0-3                 zlibbioc_1.44.0           compiler_4.2.2            dqrng_0.3.0               BiocIO_1.8.0              RColorBrewer_1.1-3        fitdistrplus_1.1-8        Rsamtools_2.14.0          cli_3.6.0                 XVector_0.38.0            listenv_0.9.0             patchwork_1.1.2           pbapply_1.7-0             MASS_7.3-58.2             tidyselect_1.2.0          stringi_1.7.12            highr_0.10                yaml_2.3.7                BiocSingular_1.14.0       locfit_1.5-9.7            ggrepel_0.9.3             grid_4.2.2                sass_0.4.5                tools_4.2.2               timechange_0.2.0          future.apply_1.10.0       parallel_4.2.2            bluster_1.8.0             metapod_1.6.0             gridExtra_2.3             farver_2.1.1              scatterplot3d_0.3-44      Rtsne_0.16                digest_0.6.31             BiocManager_1.30.19       shiny_1.7.4               Rcpp_1.0.10              
## [136] car_3.1-2                 broom_1.0.3               scuttle_1.8.4             later_1.3.0               RcppAnnoy_0.0.20          httr_1.4.4                colorspace_2.1-0          rvest_1.0.3               XML_3.99-0.13             fs_1.6.1                  tensor_1.5                reticulate_1.26           splines_4.2.2             uwot_0.1.14               statmod_1.5.0             spatstat.utils_3.0-1      scater_1.26.1             sp_1.6-0                  xgboost_1.7.5.1           plotly_4.10.1             xtable_1.8-4              jsonlite_1.8.4            R6_2.5.1                  pillar_1.8.1              htmltools_0.5.4           mime_0.12                 fastmap_1.1.0             BiocParallel_1.32.5       BiocNeighbors_1.16.0      codetools_0.2-19          utf8_1.2.3                lattice_0.20-45           bslib_0.4.2               spatstat.sparse_3.0-0     ggbeeswarm_0.7.1          leiden_0.4.3              survival_3.5-3            limma_3.54.0              rmarkdown_2.20            munsell_0.5.0             GenomeInfoDbData_1.2.9    haven_2.5.1               reshape2_1.4.4            gtable_0.3.1